
#install.packages('pacman')
pacman::p_load(plm, lmtest, sandwich, tidyverse, tidyr, broom, stargazer,
               fixest, this.path, bacondecomp, did, ggplot2, forcats,
               dplyr, plyr, kableExtra, lubridate)

#sets working directory to current folder
setwd(dirname(this.path()))

#############################
###Loading data

####Main paper data
data_panel<-read.csv('ReplicationData/data_panel.csv', na.strings = c('NA', ''))


#ensuring that the month before exile serves as the base year
data_panel$lead_lags<-factor(data_panel$lead_lags, levels=c('month_before', setdiff(unique(data_panel$lead_lags), 'month_before')))
#storing original date-formatted month
data_panel$month_format<-as.Date(data_panel$month)
#creating non-factor month
data_panel$trend<-as.numeric(as.Date(data_panel$month))
#transforming for plm
pdf<-pdata.frame(data_panel, index=c("actor.id", "month"))


#engagement analysis data + transforming for plm (includes tweets from users w/ientified location)
engagement_panel<-read.csv('ReplicationData/engagement_panel.csv', na.strings = c('NA', ''))
engagement_panel$lead_lags<-factor(engagement_panel$lead_lags, levels=c('month_before', setdiff(unique(data_panel$lead_lags), 'month_before')))
engagement_panel$month_format<-as.Date(engagement_panel$month)
engagement_panel$trend<-as.numeric(as.Date(engagement_panel$month))
engagement_pdf<-pdata.frame(engagement_panel, index=c("actor.id", "month"))


#balanced panel+ transforming for plm
balanced_panel<-read.csv('ReplicationData/balanced_panel.csv', na.strings = c('NA', ''))
balanced_panel$lead_lags<-factor(balanced_panel$lead_lags, levels=c('month_before', setdiff(unique(data_panel$lead_lags), 'month_before')))
balanced_panel$month_format<-as.Date(balanced_panel$month)
balanced_panel$trend<-as.numeric(as.Date(balanced_panel$month))
pdf_balanced<-pdata.frame(balanced_panel, index=c("actor.id", "month"))


#balanced panel with zeros before tweeting+ transforming for plm
dp_balance<-ddply(balanced_panel, .(actor.id), transform, min_tweeting=min(month[num_tweets>0]))
dp_balance$min_tweeting<-as.character(as.Date(dp_balance$min_tweeting))
dp_with_zeros<-subset(dp_balance, month>=min_tweeting)
pdf_wzeros<-pdata.frame(dp_with_zeros, index=c("actor.id", "month"))


###############################################
################Figure functions

#function to produce event study plot
es_plot<-function(model, ylab){
  coef_df<-data.frame(tidy(model, conf.int=T))
  coef_df$conf.low<-coef_df$estimate-coef_df$std.error*1.96
  coef_df$conf.high<-coef_df$estimate+coef_df$std.error*1.96
  coef_df<-subset(coef_df, grepl('lead_lags', coef_df$term)==TRUE)
  coef_df$month_since_exile<-c('pre', -6:-2, 0:11, 'post')
  coef_df<-subset(coef_df, month_since_exile!='pre'&month_since_exile!='post')
  coef_df<-rbind(coef_df, setNames(c('-1', 0, 0, 0, 0, 0, 0, -1), names(coef_df)))
  
  ggplot(coef_df, aes(x=as.numeric(month_since_exile), y=as.numeric(as.character(estimate))))+
    geom_errorbar(aes(ymin=as.numeric(as.character(conf.low)), ymax=as.numeric(as.character(conf.high))), width=.1)+
    geom_line(group=1)+geom_point()+geom_hline(yintercept=0, linetype='dashed')+
    theme_bw()+ ggtitle("")+
    theme(axis.title=element_text(size=22),
          axis.text=element_text(size=22))+
    xlab("Months Since Exile")+ylab(ylab)+
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_line(colour = "black"))+
    geom_vline(xintercept=-0.5, linetype='dotted')+
    theme(plot.title = element_text(hjust = 0.5, size=18))
  
}


#coefficient plot + stargazer tables
# Note: vectorized inputs into element_text() are no longer officially supported in ggplot2 (as of 8/22)
# This affects bolding the independent variable names in coef_plot().
# We use "suppressWarnings" to avoid warnings
#If the feature becomes fully deprecated, comment out the line "theme(axis.text.y=element_text(face=dv_types))+



coef_plot<-function(list_of_m, dv_names, dv_types, model_types,
                    filename, table_title){
  
  num_dvs<-length(dv_names)
  
  mods<-list()
  models<-list()
  ses<-list()
  pvs<-list()
  
  
  for (i in 1:num_dvs){
    if(num_dvs>1){
      dv_model<-list_of_m[[i]]
    }
    if(num_dvs==1){
      dv_model<-list_of_m
    }
    
    dv_mods<-list(dv_model[[4]], dv_model[[5]], dv_model[[6]])
    dv_coefs<-list(dv_model[[1]], dv_model[[2]], dv_model[[3]])
    dv_se<-list(tidy(dv_model[[1]])$std.error, tidy(dv_model[[2]])$std.error, tidy(dv_model[[3]])$std.error)
    dv_p<-list(tidy(dv_model[[1]])$p.value, tidy(dv_model[[2]])$p.value, tidy(dv_model[[3]])$p.value)
    
    mods<-append(mods, dv_mods)
    models<-append(models, dv_coefs)
    ses<-append(ses, dv_se)
    pvs<-append(pvs, dv_p)
    
  }

  end_point<-min(num_dvs, 2)
  
  sg<-NULL
  sg2<-NULL
  sg3<-NULL
  
  sg<-stargazer(mods[1:(end_point*length(model_types))],
                  title=table_title,
                  omit=c('month', 'trend', 'actor.id', 'Constant'),
                  align=TRUE, header=FALSE, label=paste('table:', filename, sep=''),
                  se=ses[1:(end_point*length(model_types))],
                  p =pvs[1:(end_point*length(model_types))],
                  column.labels   = c(dv_names[1:end_point]),
                  column.separate = rep(length(model_types), length(dv_names[1:end_point])),
                  covariate.labels=c('Tweeted from Exile', 'Number of Tweets'),
                  omit.stat=c('f', 'adj.rsq'),
                  model.numbers=F,
                  add.lines=list(c("Month/Year FE", paste(rep(c('Y', 'Y', 'Y'), length(dv_names[1:end_point])))),
                               c('User FE', paste(rep(c('Y', 'N', 'Y'), length(dv_names[1:end_point])))),
                               c('User trend', paste(rep(c('N', 'N', 'Y'), length(dv_names[1:end_point]))))),
                
                  font.size = 'footnotesize',
                  column.sep.width = "-15pt",
                  dep.var.labels.include=FALSE, dep.var.caption = "", no.space=TRUE,
                  notes=c("Robust standard errors clustered at the user level."))
  sg_table<-c(sg)
  
  if(num_dvs>2){
    end_point<-min(num_dvs, 4)
    
    sg2<-stargazer(mods[7:(end_point*length(model_types))],
                  omit=c('month', 'trend', 'actor.id', 'Constant'),
                  align=TRUE, header=FALSE, label=filename,
                  se=ses[7:(end_point*length(model_types))],
                  p =pvs[7:(end_point*length(model_types))],
                  column.labels   = c(dv_names[3:end_point]),
                  column.separate = rep(length(model_types), length(dv_names[3:end_point])),
                  covariate.labels=c('Tweeted from Exile', 'Number of Tweets'),
                  omit.stat=c('f', 'adj.rsq'),
                  model.numbers=F,
                  add.lines=list(c("Month/Year FE", paste(rep(c('Y', 'Y', 'Y'), length(dv_names[3:end_point])))),
                                 c('User FE', paste(rep(c('Y', 'N', 'Y'), length(dv_names[3:end_point])))),
                                 c('User trend', paste(rep(c('N', 'N', 'Y'), length(dv_names[3:end_point]))))),
                  font.size = 'footnotesize',
                  column.sep.width = "-15pt",
                  dep.var.labels.include=FALSE, dep.var.caption = "", no.space=TRUE,
                  notes=c("Robust standard errors clustered at the user level."))
    
    if(end_point/2==round(end_point/2)){
      sg1<-c(sg[1:(length(sg)-11)], sg[(length(sg)-7):(length(sg)-6)], "\\end{tabular} ")
    }
    
    if(end_point/2!=round(end_point/2)){
      sg1<-c(sg[1:(length(sg)-5)] , '\\end{tabular} ')
    }
    
    sg2<-sg2[5:length(sg2)]
    
    sg_table<-c(sg1, sg2)
    
  }
  

  if(num_dvs>4){
    end_point<-min(num_dvs, 6)
    sg3<-stargazer(mods[13:(end_point*length(model_types))],
                   omit=c('month', 'trend', 'actor.id', 'Constant'),
                   align=TRUE, header=FALSE, label=filename,
                   se=ses[13:(end_point*length(model_types))],
                   p =pvs[13:(end_point*length(model_types))],
                   column.labels   = c(dv_names[5:end_point]),
                   column.separate = rep(length(model_types), length(dv_names[5:end_point])),
                   covariate.labels=c('Tweeted from Exile', 'Number of Tweets'),
                   omit.stat=c('f', 'adj.rsq'),
                   model.numbers=F,
                   font.size = 'footnotesize',
                   add.lines=list(c("Month/Year FE", paste(rep(c('Y', 'Y', 'Y'), length(dv_names[5:end_point])))),
                                  c('User FE', paste(rep(c('Y', 'N', 'Y'), length(dv_names[5:end_point])))),
                                  c('User trend', paste(rep(c('N', 'N', 'Y'), length(dv_names[5:end_point]))))),
                   column.sep.width = "-20pt",
                   dep.var.labels.include=FALSE, dep.var.caption = "", no.space=TRUE,
                   notes=c("Robust standard errors clustered at the user level."))

    sg1<-c(sg[1:(length(sg)-5)] , '\\end{tabular} ')
    sg2<-c(sg2[1:(length(sg2)-5)] , '\\end{tabular} ')
    
    sg3<-sg3[5:length(sg3)]
    
    sg_table<-c(sg1, sg2, sg3)
    
  }
  write(sg_table, paste('tables/', filename, '_table.tex', sep=''))
  
    
  
  
  models<-rev(models)
  dv_names<-rev(dv_names)
  dv_types<-rev(dv_types)
  model_types<-rev(model_types)
  coef_df<-subset(data.frame(tidy(models[[1]], coef.int=T)),term=='tweeted_exile') 
  
  for (j in 2:length(models)){
    coef_df_new<-subset(data.frame(tidy(models[[j]], coef.int=T)),term=='tweeted_exile') 
    coef_df<-rbind(coef_df, coef_df_new)
  }
  
  coef_df$conf.low<-coef_df$estimate-coef_df$std.error*1.96
  coef_df$conf.high<-coef_df$estimate+coef_df$std.error*1.96
  coef_df$name<-rep(dv_names, each = length(model_types))
  coef_df$type<-rep(dv_types, each = length(model_types))
  coef_df$model<-rep(model_types, length(dv_names))
  
  
  coef_df$name<-factor(coef_df$name, levels=as.character(unique(coef_df$name)))
  coef_df$model<-factor(coef_df$model, levels=c('Time Trend', 'Two-Way FEs', 'Month FEs'))
  
  plotcoef<-suppressWarnings(ggplot(coef_df, aes(shape=model,linetype=model, x=name, y=estimate))+
    geom_pointrange(aes(ymin=conf.low, ymax=conf.high), position=position_dodge(width=.5), size=.9)+
    theme_bw()+
    theme(legend.position='bottom', axis.title=element_text(size=20),
          legend.justification = c(1,1),
          legend.text=element_text(size=20))+
    coord_flip()+
    ggtitle('')+
    theme(axis.title=element_text(size=22),
          axis.text=element_text(size=22))+
    geom_hline(aes(yintercept=0), lty=4)+
    scale_y_continuous(name='Change in % of Tweets')+
    scale_shape_discrete(name='', guide=guide_legend(reverse=T))+
    scale_linetype_discrete(name='', guide=guide_legend(reverse=T))+
    xlab("")+scale_x_discrete(labels=unique(coef_df$name))+
    theme(axis.text.y=element_text(face=dv_types))+
    theme(plot.title = element_text(hjust = 0.5, size=18))+
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_line(colour = "black")))
  
  ggsave(paste("plots/", filename, "_coefplot.pdf", sep=''), plotcoef, width = 8.5, height = 6.5)
  
  print(plotcoef)
  
}


#function to produce desired 2 way fe models
getting_models<-function(dv, data){
  m1 <- plm(unlist(data[dv])~ tweeted_exile+month+log(num_tweets+1), data = data, model = "within")
  coef1 <- coeftest(m1, vcov = vcovHC(m1, type = "HC1", cluster = "group"))
  
  m2 <- plm(unlist(data[dv])~ tweeted_exile+month+log(num_tweets+1), data = data, model = "pooling")
  coef2 <- coeftest(m2, vcov = vcovHC(m2, type = "HC1", cluster = "group"))
  
  m3 <- plm(unlist(data[dv]) ~ tweeted_exile+month+log(num_tweets+1)+trend*actor.id, data = data, model = "within")
  coef3 <- coeftest(m3, vcov = vcovHC(m3, type = "HC1", cluster = "group"))
  
  list(coef1, coef2, coef3, m1, m2, m3)
}



## adding imprisonment control
getting_models_polpris<-function(dv, data){
  m1 <- plm(unlist(data[dv])~ tweeted_exile+tweeted_prison+month+log(num_tweets+1), data = data, model = "within")
  coef1 <- coeftest(m1, vcov = vcovHC(m1, type = "HC1", cluster = "group"))
  
  m2 <- plm(unlist(data[dv])~ tweeted_exile+tweeted_prison+month+log(num_tweets+1), data = data, model = "pooling")
  coef2 <- coeftest(m2, vcov = vcovHC(m2, type = "HC1", cluster = "group"))
  
  m3 <- plm(unlist(data[dv]) ~ tweeted_exile+tweeted_prison+month+log(num_tweets+1)+trend*actor.id, data = data, model = "within")
  coef3 <- coeftest(m3, vcov = vcovHC(m3, type = "HC1", cluster = "group"))
  
  list(coef1, coef2, coef3, m1, m2, m3)
}


#function to produce desired 2 way fe models w/party controls*trend
getting_models_wparty<-function(dv, data){
  m1 <- plm(unlist(data[dv])~ tweeted_exile+month+log(num_tweets+1)+trend:party, data = data, model = "within")
  coef1 <- coeftest(m1, vcov = vcovHC(m1, type = "HC1", cluster = "group"))
  
  m2 <- plm(unlist(data[dv])~ tweeted_exile+month+log(num_tweets+1)+trend:party, data = data, model = "pooling")
  coef2 <- coeftest(m2, vcov = vcovHC(m2, type = "HC1", cluster = "group"))
  
  m3 <- plm(unlist(data[dv]) ~ tweeted_exile+month+log(num_tweets+1)+trend*actor.id+trend:party, data = data, model = "within")
  coef3 <- coeftest(m3, vcov = vcovHC(m3, type = "HC1", cluster = "group"))
  
  se_list<-list(tidy(coef1)$std.error, tidy(coef2)$std.error, tidy(coef3)$std.error)
  p_list<-list(tidy(coef1)$p.value, tidy(coef2)$p.value, tidy(coef3)$p.value)
  
  list(coef1, coef2, coef3, m1, m2, m3)
}



#function to perform pre/post t-test
ttest_terms<-function(days_since, term){
  t.test(term[data$days_since_exile<days_since&data$days_since_exile>=0],
         term[data$days_since_exile<0&data$days_since_exile>-days_since])
}


#plotting for pre/post t-test
ttest_multiple_periods<-function(periods, term, type, label){
  divisor<-ifelse(type=='percent of mean', mean(term), 1)
  ttest_result<-ttest_terms(periods[1], term)
  conf.low<-ttest_result$conf.int[1]/divisor*100
  conf.high<-ttest_result$conf.int[2]/divisor*100
  est<-(ttest_result$estimate[1]/divisor-ttest_result$estimate[2]/divisor)*100
  tt_df<-data.frame(cbind(conf.low, conf.high, est, label))
  
  for (i in 2:length(periods)){
    ttest_result<-ttest_terms(periods[i], term)
    conf.low<-ttest_result$conf.int[1]/divisor*100
    conf.high<-ttest_result$conf.int[2]/divisor*100
    est<-(ttest_result$estimate[1]/divisor-ttest_result$estimate[2]/divisor)*100
    tt_df<-rbind(tt_df, data.frame(cbind(conf.low, conf.high, est, label)))
  }
  tt_df$period<-periods
  tt_df
}


############################
### additional variable changes

pdf$US_destination<-factor(pdf$US_destination, levels=c('other', 'not_exiled', 'us'))
pdf$CO_destination<-factor(pdf$CO_destination, levels=c('other', 'not_exiled', 'colombia'))





